# Libraries
if (!require("pacman")) install.packages("pacman")
pacman::p_load(dplyr, ggplot2, tidyr, cowplot, viridis, forcats, radiant.data, purrr, phangorn, gridExtra, ggtree, htmlTable, ape, cluster, tibble, Biostrings, seqinr, stringr, scales)# Functions
mismatches <- function(query, ref) {
pairwiseAlignment(ref, query, substitutionMatrix = "BLOSUM50",
gapOpening = 3, gapExtension = 1) %>%
mismatchTable() %>%
mutate(ID=names(query),
Pos=PatternStart,
Reference_AA=as.character(PatternSubstring),
Sample_AA=as.character(SubjectSubstring)) %>%
select(ID, Reference_AA, Sample_AA, Pos) %>%
filter(Sample_AA != "X")
}
# Import sequence data
gyrA <- readAAStringSet("../data/Prokka/DNA_gyrase_GYRA_aa_sequences.fa")
parC <- readAAStringSet("../data/Prokka/Topoisomerase_IV_subunit_A_aa_sequences.fa")
parE <- readAAStringSet("../data/Prokka/DNA_topoisomerase_IV_subunit_B_aa_sequences.fa")
# Identify results from ARIBA
mut_gyrA <- mut_quant %>%
select(id, gyrA) %>%
mutate(entries = sapply(strsplit(.$gyrA, ","),
FUN = function(x) {length(x)})) %>%
separate(gyrA, into = as.character(c(1:max(.$entries)))) %>%
select(-entries) %>%
gather(key, value, `1`,`2`) %>%
filter(is.na(value) == FALSE) %>%
select(-key) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup()
# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
gyrA_results <- bind_rows(lapply(seq_along(gyrA[-1]),
function(i) mismatches(gyrA[i + 1],
gyrA[1]))) %>%
mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
select(ID, mutation) %>%
mutate(ID = sub(".faa", "", ID)) %>%
dplyr::rename("ref" = ID,
"gyrA_prokka" = mutation) %>%
left_join(id_data, by = "ref") %>%
select(id, gyrA_prokka) %>%
mutate(gyrA_result = gyrA_prokka %>% # control for mutation within QRDR for gyrA
str_extract_all("\\d+") %>% # from reprex package
map(as.integer) %>% # converts all to integer
map_lgl(~ any(.x >= 67L & .x <= 106L))) %>%
filter(gyrA_result == TRUE) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup() %>%
left_join(mut_gyrA, by = c("id","test")) %>%
select(-test) %>%
dplyr::rename("gyrA_ariba" = value) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
select(-gyrA_result)
# Identify results from ARIBA
mut_parC <- mut_quant %>%
select(id, parC) %>%
mutate(entries = sapply(strsplit(.$parC, ","), FUN = function(x) {length(x)})) %>%
separate(parC, into = as.character(c(1:max(.$entries)))) %>%
select(-entries) %>%
gather(key, value, `1`,`2`) %>%
filter(is.na(value) == FALSE) %>%
select(-key) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup()
# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
parC_results <- bind_rows(lapply(seq_along(parC[-1]),
function(i) mismatches(parC[i + 1],
parC[1]))) %>%
mutate(Pos = Pos + 22) %>%
mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
select(ID, mutation) %>%
mutate(ID = sub(".faa", "", ID)) %>%
dplyr::rename("ref" = ID,
"parC_prokka" = mutation) %>%
left_join(id_data, by = "ref") %>%
select(id, parC_prokka) %>%
mutate(parC_result = parC_prokka %>%
str_extract_all("\\d+") %>%
map(as.integer) %>%
map_lgl(~ any(.x >= 51L & .x <= 170L))) %>%
filter(parC_result == TRUE) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup() %>%
left_join(mut_parC, by = c("id","test")) %>%
select(-test) %>%
dplyr::rename("parC_ariba" = value) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
select(-parC_result)
# Identify results from ARIBA
mut_parE <- mut_quant %>%
select(id, parE) %>%
mutate(entries = sapply(strsplit(.$parE, ","), FUN = function(x) {length(x)})) %>%
separate(parE, into = as.character(c(1:max(.$entries)))) %>%
select(-entries) %>%
gather(key, value, `1`,`2`) %>%
filter(is.na(value) == FALSE) %>%
select(-key) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup()
# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
parE_results <- bind_rows(lapply(seq_along(parE[-1]),
function(i) mismatches(parE[i + 1],
parE[1]))) %>%
#mutate(Pos = Pos + 22) %>%
mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
select(ID, mutation) %>%
mutate(ID = sub(".faa", "", ID)) %>%
dplyr::rename("ref" = ID,
"parE_prokka" = mutation) %>%
left_join(id_data, by = "ref") %>%
select(id, parE_prokka) %>%
mutate(parE_result = parE_prokka %>%
str_extract_all("\\d+") %>%
map(as.integer) %>%
map_lgl(~ any(.x >= 366L & .x <= 523L))) %>%
filter(parE_result == TRUE) %>%
group_by(id) %>%
mutate(test = 1:n()) %>%
ungroup() %>%
left_join(mut_parE, by = c("id","test")) %>%
select(-test) %>%
dplyr::rename("parE_ariba" = value) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
select(-parE_result)
full_report <- gyrA_results %>%
left_join(parC_results, by = "id") %>%
left_join(parE_results, by = "id") %>%
group_by(id) %>%
summarise_all(funs(func_paste)) %>%
mutate(parC_ariba = ifelse(parC_prokka == "S80I" &
parC_ariba == "S58I", "S80I", parC_ariba))
full_reportisolate_data_complete <- isolate_data %>%
left_join(no_of_reads, by = "id")
isolate_data_completeThis table lists all the genes of interest in this study, some information about them, and their accession number(s).
#
# acquired_ref_names <- acquired_data$ref_name
# acquired_ref <- c()
#
# for (i in ac_genes) {
# for (j in acquired_ref_names) {
# if (grepl(i,j, ignore.case = T) == TRUE) {
# acquired_ref <- c(acquired_ref, j)
# }
# }
# }
#
# acquired_ref <- unique(acquired_ref)
#
# acquired_ref_genes <- acquired_data %>%
# mutate(test = duplicated(ref_name),
# gene = case_when(grepl("qnra1", ref_name, ignore.case = T) == TRUE ~ "qnrA1",
# grepl("qnrb19", ref_name, ignore.case = T) == TRUE ~ "qnrB19",
# grepl("qnrb6", ref_name, ignore.case = T) == TRUE ~ "qnrB6",
# grepl("qnrb60", ref_name, ignore.case = T) == TRUE ~ "qnrB60",
# grepl("qnrs1", ref_name, ignore.case = T) == TRUE ~ "qnrS1",
# grepl("qnrs2", ref_name, ignore.case = T) == TRUE ~ "qnrS2",
# grepl("qnrs4", ref_name, ignore.case = T) == TRUE ~ "qnrs4",
# grepl("qnrs9", ref_name, ignore.case = T) == TRUE ~ "qnrS9",
# grepl("qep", ref_name, ignore.case = T) == TRUE ~ "qepA")) %>%
# filter(test == FALSE,
# ref_name %in% acquired_ref_names) %>%
# select(ref_name, gene) %>%
# mutate(compl = complete.cases(.)) %>%
# filter(compl == TRUE) %>%
# mutate(ref_name2 = sub(".+_(.*?)", "\\1", ref_name)) %>%
# group_by(gene) %>%
# summarise_all(funs(func_paste)) %>%
# select(gene, ref_name2) %>%
# dplyr::rename("Accession number(s)" = ref_name2,
# "Gene" = gene) %>%
# mutate(Description = "Quinolone resistance protein, plasmid mediated, decreases susceptibility of quinolones",
# Type = "Acquired")
#
# df_ref_names <- mut_data$cluster
# ref_names <- c()
#
# for (i in mut_genes) {
# for (j in df_ref_names) {
# if (grepl(i,j, ignore.case = T) == TRUE) {
# ref_names <- c(ref_names, j)
# }
# }
# }
#
# ref_names <- unique(ref_names)
#
# intrinsic_ref_genes <- mut_data %>%
# mutate(test = duplicated(cluster),
# gene = case_when(grepl("gyra", cluster, ignore.case = T) == TRUE ~ "gyrA",
# grepl("gyrb", cluster, ignore.case = T) == TRUE ~ "gyrB",
# grepl("parc", cluster, ignore.case = T) == TRUE ~ "parC",
# grepl("pare", cluster, ignore.case = T) == TRUE ~ "parE",
# grepl("soxS", cluster, ignore.case = T) == TRUE ~ "soxS",
# grepl("marR", cluster, ignore.case = T) == TRUE ~ "marR",
# grepl("mara", cluster, ignore.case = T) == TRUE ~ "marA",
# grepl("rpob", ref_name, ignore.case = T) == TRUE ~ "rpoB",
# grepl("rob", ref_name, ignore.case = T) == TRUE ~ "robA")) %>%
# filter(test == FALSE,
# cluster %in% ref_names) %>%
# select(ref_name, gene, free_text) %>%
# mutate(ref_name2 = sub("^.*?_([A-Z]+[0-9_.]*[0-9]).*", "\\1", ref_name)) %>%
# select(-ref_name) %>%
# group_by(gene) %>%
# summarise_all(funs(func_paste)) %>%
# select(gene, ref_name2) %>%
# dplyr::rename("Accession number(s)" = ref_name2,
# "Gene" = gene)
# # mutate(Description = case_when(Gene == "gyrA" ~ ,
# # Gene == "gyrB" ~ ,
# # Gene == "marR" ~ ,
# # Gene == "marA" ~ ,
# # Gene == "parC" ~ ,
# # Gene == "parE" ~ ,
# # Gene == "soxS" ~ "Transcriptional activator of superoxide response regulon",
# # Gene == "rpoB" ~ "RNA polymerase subunit B",
# # Gene == "robA" ~ ),
# # Type = "Intrinsic gene")
#
# ref_genes <- rbind(intrinsic_ref_genes, acquired_ref_genes)
#
# ref_genesper_species <- isolate_data %>%
group_by(species) %>%
dplyr::count()
per_speciestot_mlst <- mlst_data[,c("id","ST")] %>%
group_by(ST) %>%
dplyr::count() %>%
arrange(desc(n))
print(paste0("Total amount of different sequence types: ", nrow(tot_mlst)))## [1] "Total amount of different sequence types: 83"
tot_mlstmlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
group_by(ST, species) %>%
dplyr::count() %>%
arrange(desc(n))mlst_n <- mlst_data[,c("id","ST")] %>%
group_by(ST) %>%
dplyr::count() %>%
filter(n > 3)
mlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
spread(ST, species) %>%
summarise_all(funs(func_paste)) %>%
select(-id) %>%
gather(ST, species) %>%
filter(ST %in% mlst_n$ST)mlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
group_by(species) %>%
mutate(n = length(unique(ST))) %>%
select(species,n) %>%
summarise_all(funs(func_paste))acquired_report %>%
select(-id) %>%
mutate_all(funs(as.numeric(.))) %>%
mutate(total = rowSums(.),
result = if_else(total != 0, 1, 0)) %>%
group_by(result) %>%
dplyr::count() %>%
ungroup() %>%
mutate(result = if_else(result == 0, "Absent", "Present")) %>%
spread(result, n) %>%
mutate(Total = Present + Absent,
Percent = round(Present/Total * 100,1))acquired_report %>%
select(-c(qepA4, id)) %>%
mutate_all(funs(as.numeric)) %>%
dplyr::rename("qnrA" = qnrA1) %>%
mutate(qnrB = ifelse(rowSums(.[,2:4]) >= 1, 1, 0),
qnrS = ifelse(rowSums(.[,5:7]) >= 1, 1, 0)) %>%
select(qnrA, qnrB, qnrS) %>%
gather(gene, value) %>%
group_by(gene, value) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 0, "Absent", "Present")) %>%
spread(value, n) %>%
mutate(Total = Present + Absent,
Percent = round(Present/Total * 100,1))acquired_report %>%
select(-id) %>%
t() %>%
as.data.frame(.) %>%
mutate(Gene = rownames(.)) %>%
mutate_at(vars(-Gene),
funs(as.numeric(as.character(.)))) %>%
mutate(Present = rowSums(.[,-281])) %>%
select(c(Gene, Present)) %>%
rowwise() %>%
mutate(Total = 280,
Percent = round(Present/Total * 100,1))qnr_genes <- c("qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")
acquired_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, qnr_genes) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round(Present / Total * 100, 1)) %>%
arrange(key, desc(Percent))mut_report %>%
select(gyrA, gyrB, parC, parE) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate_at(vars(c(gyrA, gyrB, parC, parE)),
.funs = funs(as.numeric)) %>%
mutate(type = ifelse(rowSums(.[,1:4]) == 0, 1, 0)) %>%
select(n, type) %>%
group_by(type) %>%
summarise_all(funs(sum)) %>%
mutate(type = ifelse(type == 0, "Present", "Absent")) %>%
spread(type, n) %>%
mutate(Total = Present + Absent,
Percent = round(Present/Total * 100,1)) %>%
select(-Total)mut_report %>%
select(-nt_change) %>%
gather(gene, value, -id) %>%
group_by(gene, value) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
mutate(Total = Present + Absent,
Percent = round(Present/Total * 100,1)) %>%
arrange(desc(Percent))suppressWarnings(clean_mut_data %>%
mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
ref_nt = ifelse(ref_nt == ".", "", ref_nt),
ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
mutation = ifelse(mutation == "/-", "0", mutation)) %>%
select(ref, gene_names, flag, mutation) %>%
filter(flag %in% flag_selection,
gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
mutate(id = 1:n()) %>%
spread(gene_names, mutation) %>%
group_by(ref) %>%
summarise_all(funs(func_paste2)) %>%
select(-c(id, flag)) %>%
left_join(id_data, by = "ref") %>%
dplyr::select(id, everything(), -ref) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
gather(gene, mut, -id) %>%
mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
result = ifelse(mut != 0, 1, 0),
result = as.integer(result),
type = "mut") %>%
separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
gather(mut, value, paste("v", 1:12, sep = "_")) %>%
separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
select(-mut) %>%
dplyr::rename("mut" = aa_change) %>%
filter(is.na(nt_change) == FALSE) %>%
fix_gyr_par_results(.) %>%
filter(result_total == 1) %>%
group_by(gene, mut, nt_change) %>%
dplyr::count() %>%
separate(nt_change, into = c("Reference nt", "Contig nt"), sep = "-") %>%
rowwise() %>%
mutate(Total = 280,
Percent = round(n/Total * 100,1)) %>%
dplyr::rename("Mutation" = mut) %>%
arrange(gene, desc(Percent)))suppressWarnings(clean_mut_data %>%
mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
ref_nt = ifelse(ref_nt == ".", "", ref_nt),
ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
mutation = ifelse(mutation == "/-", "0", mutation)) %>%
select(ref, gene_names, flag, mutation) %>%
filter(flag %in% flag_selection,
gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
mutate(id = 1:n()) %>%
spread(gene_names, mutation) %>%
group_by(ref) %>%
summarise_all(funs(func_paste2)) %>%
dplyr::select(-c(id, flag)) %>%
left_join(id_data, by = "ref") %>%
dplyr::select(id, everything(), -ref) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
gather(gene, mut, -id) %>%
mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
result = ifelse(mut != 0, 1, 0),
result = as.integer(result),
type = "mut") %>%
separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
gather(mut, value, paste("v", 1:12, sep = "_")) %>%
separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
select(-mut) %>%
dplyr::rename("mut" = aa_change) %>%
filter(is.na(nt_change) == FALSE) %>%
fix_gyr_par_results(.) %>%
filter(result_total == 1) %>%
left_join(isolate_data[c("id","species")], by = "id") %>%
group_by(gene, mut, species) %>%
dplyr::count() %>%
rowwise() %>%
mutate(Total = case_when(species == "Broiler" ~ 87,
species == "Pig" ~ 75,
species == "Red fox" ~ 52,
species == "Wild bird" ~ 66),
Percent = round(n/Total * 100,1)) %>%
dplyr::rename("Mutation" = mut) %>%
arrange(gene, desc(Percent)))mut_quant %>%
group_by(gyrA, gyrB, parC, parE) %>%
dplyr::count() %>%
rowwise() %>%
mutate(Total = 280,
Percent = round(n/Total * 100,1)) %>%
arrange(desc(Percent))gene_names <- names(mut_report)
gene_names <- gene_names[-c(1,2)]
mut_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, gene_names) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round(Present / Total * 100, 1)) %>%
dplyr::rename("Gene" = key,
"Species" = species) %>%
dplyr::select(Gene, Species, Present, Total, Percent) %>%
arrange(Gene, desc(Percent))These mutations are the most prominent in each gene where mutations were identified (n > 2). The percentage is based on how many of the total isolates with mutations in the respective gene had the specific mutation in question.
mut_filtered %>%
filter(gene %not_in% c("gyrA","gyrB","parC","parE")) %>%
select(id, gene, mut) %>%
mutate(entries = sapply(strsplit(.$mut, ","),
FUN = function(x) {length(x)})) %>%
separate(mut, into = paste("v", 1:max(.$entries), sep = "_"), sep = ",") %>%
gather(mut, value, paste("v", 1:max(.$entries), sep = "_")) %>%
mutate(value = str_trim(value)) %>%
select(id, gene, value) %>%
filter(value != 0,
is.na(value) == FALSE) %>%
group_by(gene, value) %>%
dplyr::count() %>%
ungroup() %>%
filter(n > 2) %>%
mutate(percent = case_when(
gene == "gadX" ~
round(n / length(which(mut_report[, "gadX"] == 1))*100, 1),
gene == "marA" ~
round(n / length(which(mut_report[, "marA"] == 1))*100, 1),
gene == "marR" ~
round(n / length(which(mut_report[, "marR"] == 1))*100, 1),
gene == "rpoB" ~
round(n / length(which(mut_report[, "rpoB"] == 1))*100, 1),
gene == "soxS" ~
round(n / length(which(mut_report[, "soxS"] == 1))*100, 1))) %>%
arrange(gene, desc(percent)) %>%
dplyr::rename("Gene" = gene,
"Mutation" = value,
"Percent" = percent)mut_report %>%
filter(gyrA == 0,
parC == 0,
parE == 0) %>%
left_join(acquired_report, by = "id") %>%
mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,14:20]) == 0, 0, 1)) %>%
select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
select(-c(nt_change, id)) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
dplyr::select(-c(gyrA,gyrB,parC,parE))mut_report %>%
select(-c(nt_change, gyrA, parC, parE)) %>%
left_join(mut_quant[c("gyrA","parC","parE","id")], by = "id") %>%
left_join(acquired_report, by = "id") %>%
mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,13:19]) == 0, 0, 1)) %>%
left_join(mic_data[c("id","CIP","NAL")], by = "id") %>%
select(-c(id,qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
group_by_all() %>%
dplyr::count() %>%
arrange(desc(CIP))do_chisq_test <- function(df) {
df <- df %>%
dplyr::slice(expand.grid(1:length(unique(species)), 1:length(unique(species))) %>% rev %>%
dplyr::filter(Var2 < Var1) %>% t) %>%
mutate(test = rep(1:(n()/2), each = 2)) %>%
group_by(test) %>%
dplyr::do(data_frame(gene = .$key,
test = dplyr::first(.$test),
species1 = dplyr::first(.$species),
species2 = dplyr::last(.$species),
data = list(matrix(c(.$Absent,
.$Present),
ncol = 2)))) %>%
mutate(chi_test = map(data, chisq.test, correct = FALSE)) %>%
mutate(p.value = map_dbl(chi_test, "p.value"),
dupl = duplicated(test)) %>%
dplyr::filter(dupl == FALSE) %>%
ungroup() %>%
select(gene, species1, species2, p.value) %>%
mutate(p.value = round(p.value, 5)) %>%
dplyr::filter(p.value <= 0.05)
return(df)
}
total_df <- acquired_report %>%
left_join(mut_report, by = "id") %>%
left_join(isolate_data[c("id", "species")], by = "id") %>%
select(-nt_change)
mechanism_per_species <- total_df %>%
select(id, species, everything()) %>%
select(-id) %>%
gather(key, value, -species) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
select(key, species, Present, Absent) %>%
split(f = .$key)
intrinsic_names <- names(mut_report)
intrinsic_names <- intrinsic_names[-c(1,2)]
intrinsic_gene_perc <- mut_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, intrinsic_names) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round(Present / Total * 100, 1)) %>%
select(key, species, Percent) %>%
dplyr::rename("gene" = key)
acquired_names <- names(acquired_report)
acquired_names <- acquired_names[-1]
acquired_genes_perc <- acquired_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, acquired_names) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round(Present / Total * 100, 1)) %>%
select(key, species, Percent) %>%
dplyr::rename("gene" = key)
total_perc <- rbind(intrinsic_gene_perc, acquired_genes_perc)
p_values_total <- suppressWarnings(lapply(mechanism_per_species, function(x) do_chisq_test(x))) %>%
bind_rows() %>%
left_join(total_perc, by = c("gene", c("species1" = "species"))) %>%
dplyr::rename("Percent species 1" = Percent) %>%
left_join(total_perc, by = c("gene", c("species2" = "species"))) %>%
dplyr::rename("Percent species 2" = Percent) %>%
dplyr::select(gene, species1, `Percent species 1`, species2, `Percent species 2`, p.value)
p_values_totalpercent_am_res <- mic_data %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
select(species,contains("_R")) %>%
gather(compound, value, -species) %>%
mutate(compound = sub("_R", "", compound)) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round((Present/Total)*100, 1))
percent_am_resmut_report %>%
left_join(acquired_report, by = "id") %>%
left_join(mlst_data[,c("id","ST")], by = "id") %>%
select(-c(nt_change, id)) %>%
gather(gene, value, -ST) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
filter(Present != 0) %>%
mutate(Total = Present + Absent) %>%
filter(Total > 2) %>%
rowwise() %>%
mutate(Percent = round(Present/Total*100, 1)) %>%
ungroup() %>%
group_by(ST) %>%
mutate(no_of_mechanisms = n())mut_report %>%
left_join(acquired_report, by = "id") %>%
left_join(mlst_data[,c("id","ST")], by = "id") %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
select(-c(nt_change, id)) %>%
gather(gene, value, -c(ST, species)) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
filter(Present != 0) %>%
mutate(Total = Present + Absent) %>%
filter(Total > 2) %>%
rowwise() %>%
mutate(Percent = round(Present/Total*100, 1)) %>%
ungroup() %>%
group_by(ST, species) %>%
mutate(no_of_mechanisms = n()) %>%
arrange(ST, species)cip_nal_res <- mic_data %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
select(species,contains("_R")) %>%
gather(compound, value, -species) %>%
mutate(compound = sub("_R", "", compound)) %>%
group_by_all() %>%
dplyr::count() %>%
filter(compound %in% c("CIP","NAL")) %>%
ungroup() %>%
mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
spread(value, n, fill = 0) %>%
select(compound, species, Present, Absent) %>%
dplyr::rename("key" = compound) %>%
split(f = .$key)
p_values_res <- suppressWarnings(lapply(cip_nal_res, function(x) do_chisq_test(x))) %>%
bind_rows() %>%
left_join(percent_am_res[,c("species","compound","Percent")], by = c("species1" = "species",
"gene" = "compound")) %>%
dplyr::rename("Percent_species1" = Percent) %>%
left_join(percent_am_res[,c("species","compound","Percent")], by = c("species2" = "species",
"gene" = "compound")) %>%
dplyr::rename("Percent_species2" = Percent,
"Compound" = gene) %>%
select(Compound, species1, Percent_species1, species2, Percent_species2, p.value)
p_values_resrest_res <- mic_data %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
select(species,contains("_R")) %>%
gather(compound, value, -species) %>%
mutate(compound = sub("_R", "", compound)) %>%
group_by_all() %>%
dplyr::count() %>%
filter(compound %not_in% c("CIP","NAL")) %>%
ungroup() %>%
mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
spread(value, n, fill = 0) %>%
select(compound, species, Present, Absent) %>%
dplyr::rename("key" = compound) %>%
split(f = .$key)
p_values_res_rest <- suppressWarnings(lapply(rest_res, function(x) do_chisq_test(x))) %>%
bind_rows() %>%
left_join(percent_am_res[,c("species","compound","Percent")], by = c("species1" = "species",
"gene" = "compound")) %>%
dplyr::rename("Percent_species1" = Percent) %>%
left_join(percent_am_res[,c("species","compound","Percent")], by = c("species2" = "species",
"gene" = "compound")) %>%
dplyr::rename("Percent_species2" = Percent,
"Compound" = gene) %>%
select(Compound, species1, Percent_species1, species2, Percent_species2, p.value)
p_values_res_restAddCol <- function(df, col_name) {
# split rows by delimeters
string_to_proc <- df %>% select(!!col_name) %>%
base::unlist() %>% str_split(regex("\\, |\\,"))
# find unique entries
unique_strings <- string_to_proc %>%
base::unlist() %>% base::unique()
# construct names of the new columns
cols_names <- paste(col_name, unique_strings, sep = "_")
# construct 0/1-content columns for each unique entry
cols_content <- sapply(function(i) {
as.integer(base::unlist(lapply(function(Z) any(Z %in% unique_strings[i]),
X = string_to_proc)))
}, X = seq_along(unique_strings))
res <- data.frame(cols_content)
names(res) <- cols_names
return(res)
}
create_corr_matrix <- function(df) {
df <- df %>%
as.matrix %>%
cor(method = "spearman") %>%
as.data.frame %>%
rownames_to_column(var = "var1") %>%
gather(var2, value, -var1) %>%
mutate(var1 = gsub("(.*?)_R$", "\\1", var1),
var2 = gsub("(.*?)_R$", "\\1", var2))
return(df)
}
col_proc <- c("gyrA","gyrB","parC","parE")
cols_to_add <- sapply(function(i) {AddCol(df = mut_quant, col_name = col_proc[i])},
X = seq_along(col_proc)) %>%
bind_cols() %>%
select(-c(gyrA_0, parC_0, gyrB_0, parE_0)) %>%
mutate_all(funs(as.numeric)) %>%
{. ->> tot_mut_report} %>%
select(gyrA_S83L, gyrA_D87N, parC_S80I) %>%
mutate(gyrA_S83L_D87N = ifelse(gyrA_S83L == 1 & gyrA_D87N == 1, 1, 0),
gyrA2_parC1 = ifelse(gyrA_S83L == 1 & gyrA_D87N == 1 & parC_S80I == 1, 1, 0))
test <- mut_quant %>%
cbind(tot_mut_report) %>%
select(-contains("mut")) %>%
select(-c(gyrA, gyrB, parC, parE)) %>%
left_join(acquired_report, by = "id") %>%
mutate_at(vars(-id),
funs(as.numeric)) %>%
mutate(qnr = ifelse(rowSums(.[,21:27]) == 0, 0, 1)) %>%
select(-id)
test2 <- create_corr_matrix(test)## Warning in cor(., method = "spearman"): the standard deviation is zero
cor.test(test$gyrA_S83L, test$qnr)##
## Pearson's product-moment correlation
##
## data: test$gyrA_S83L and test$qnr
## t = -15.973, df = 278, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7483113 -0.6252511
## sample estimates:
## cor
## -0.6917707
palette4 <- c("Livestock" = "#80b1d3",
"Wild" = "#fb8072")
mlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
mutate(animal_group = ifelse(species %in% c("Broiler","Pig"), "Livestock", "Wild")) %>%
group_by(ST, animal_group) %>%
dplyr::count() %>%
group_by(ST) %>%
mutate(total = dplyr::first(n) + dplyr::last(n)) %>%
filter(total > 4) %>%
ggplot(aes(factor(ST), n, fill = animal_group))+
geom_col(color = "black")+
labs(x = "Sequence Types")+
scale_fill_manual(values = palette4)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.45),
legend.title = element_blank())genes <- c(names(mut_report), names(acquired_report), "qnr")
genes <- genes[!genes %in% c("id","nt_change","qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")]
no_of_mlst <- mlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
group_by(ST, species) %>%
dplyr::count() %>%
group_by(ST) %>%
mutate(group_n = sum(n)) %>%
filter(group_n > 3)
mlst_mechanisms_report <- mut_report %>%
left_join(acquired_report, by = "id") %>%
mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,14:20]) == 0, 0, 1)) %>%
select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4,nt_change)) %>%
gather(gene, value, -id) %>%
mutate(present = ifelse(value == 1, gene, NA),
ref = 1:n()) %>%
spread(gene, value) %>%
group_by(id) %>%
summarise_all(funs(func_paste)) %>%
left_join(mlst_data[c("id","ST")], by = "id") %>%
select(-c(ref, id)) %>%
ungroup() %>%
mutate_at(vars(-present, ST), .funs = as.numeric) %>%
mutate(none = ifelse(rowSums(.[,genes]) == 0, 1, 0),
present = ifelse(present == "", "None", present)) %>%
gather(gene, value, -c(present,ST)) %>%
mutate(gene = ifelse(value == 0 & gene != "None", NA, gene),
value = ifelse(gene == "None" & value == 0, NA, value)) %>%
na.omit() %>%
group_by(ST, gene, value) %>%
dplyr::count() %>%
filter(ST %in% no_of_mlst$ST)
palette <- c("Broiler" = "#4575b4",
"Pig" = "#74add1",
"Red fox" = "#f46d43",
"Wild bird" = "#fdae61")
p1 <- ggplot(mlst_mechanisms_report, aes(factor(ST), gene, size = n))+
geom_point()+
guides(size = FALSE)+
labs(x = "Sequence Types",
y = "Genes")+
theme(axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.ticks.x = element_blank())
p2 <- ggplot(no_of_mlst, aes(factor(ST), n, fill = species))+
geom_col(color = "black")+
scale_fill_manual(values = palette)+
guides(fill = FALSE)+
theme(axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank())
plot_grid(p2, p1, nrow = 2, align = "v")total_mechanisms <- mut_report %>%
select(-nt_change) %>%
left_join(acquired_report, by = "id") %>%
mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,13:19]) == 0, 0, 1)) %>%
select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
gather(gene, result, -id) %>%
group_by(gene, result) %>%
dplyr::count() %>%
ungroup() %>%
mutate(result = ifelse(result == 1, "Present", "Absent")) %>%
spread(result, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round((Present/Total)*100, 1),
lwr = round(get_binCI(Present, Total)[1], 1),
upr = round(get_binCI(Present, Total)[2], 1),
type = ifelse(gene %in% c("qnr","qepA4"), "Acquired","Intrinsic"))
palette2 <- c("Acquired" = "#8dd3c7",
"Intrinsic" = "#80b1d3")
ggplot(total_mechanisms, aes(reorder(gene, -Percent), Percent, fill = type)) +
geom_col(color = "black") +
geom_errorbar(aes(ymin = lwr, ymax = upr),
width = 0.5)+
scale_fill_manual(values = palette2) +
labs(x = "Genes",
y = "Percent (%) Isolates",
fill = NULL)gene_names <- names(mut_report)
gene_names <- gene_names[-c(1,2)]
palette <- c("Broiler" = "#4575b4",
"Pig" = "#74add1",
"Red fox" = "#f46d43",
"Wild bird" = "#fdae61")
mut_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, gene_names) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(
Total = Present + Absent,
Percent = round(Present / Total * 100, 1),
lwr = round(get_binCI(Present, Total)[1], 1),
upr = round(get_binCI(Present, Total)[2], 1)
) %>%
mutate(species = factor(species,
levels = c("Broiler","Pig","Wild bird","Red fox"))) %>%
ggplot(aes(reorder(key, -Percent), Percent, fill = species)) +
geom_col(color = "black",
position = position_dodge(1)) +
geom_errorbar(
aes(ymin = lwr, ymax = upr),
width = 0.6,
position = position_dodge(1)
) +
scale_fill_manual(values = palette) +
labs(y = "Percent (%) of isolates") +
guides(fill = FALSE) +
theme_classic() +
theme(
axis.text.x = element_text(
angle = 90,
size = 12,
hjust = 1,
vjust = 0.4
),
axis.title.x = element_blank(),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14),
strip.text = element_text(size = 12)
) +
facet_wrap(~ species)palette <- c("Broiler" = "#4575b4",
"Pig" = "#74add1",
"Red fox" = "#f46d43",
"Wild bird" = "#fdae61")
suppressWarnings(clean_mut_data %>%
mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
ref_nt = ifelse(ref_nt == ".", "", ref_nt),
ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
mutation = ifelse(mutation == "/-", "0", mutation)) %>%
select(ref, gene_names, flag, mutation) %>%
filter(flag %in% flag_selection,
gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
mutate(id = 1:n()) %>%
spread(gene_names, mutation) %>%
group_by(ref) %>%
summarise_all(funs(func_paste2)) %>%
dplyr::select(-c(id, flag)) %>%
left_join(id_data, by = "ref") %>%
dplyr::select(id, everything(), -ref) %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S")) %>%
gather(gene, mut, -id) %>%
mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
result = ifelse(mut != 0, 1, 0),
result = as.integer(result),
type = "mut") %>%
separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
gather(mut, value, paste("v", 1:12, sep = "_")) %>%
separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
select(-mut) %>%
dplyr::rename("mut" = aa_change) %>%
filter(is.na(nt_change) == FALSE) %>%
fix_gyr_par_results(.) %>%
filter(result_total == 1) %>%
left_join(isolate_data[c("id","species")], by = "id") %>%
group_by(gene, mut, species) %>%
dplyr::count() %>%
rowwise() %>%
mutate(Total = case_when(species == "Broiler" ~ 87,
species == "Pig" ~ 75,
species == "Red fox" ~ 52,
species == "Wild bird" ~ 66),
Percent = round(n/Total * 100,1),
lwr = round(get_binCI(n, Total)[1], 1),
upr = round(get_binCI(n, Total)[2], 1)) %>%
dplyr::rename("Mutation" = mut)) %>%
ggplot(aes(reorder(Mutation, -Percent), Percent, fill = species))+
geom_col(color = "black", position = position_dodge(0.9))+
geom_errorbar(aes(ymin = lwr, ymax = upr),
position = position_dodge(0.9),
width = 0.5)+
scale_fill_manual(values = palette)+
guides(fill = FALSE)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3),
axis.title.x = element_blank())+
facet_wrap(~ gene, scale = "free_x")qnr_genes <- c("qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")
acquired_report %>%
left_join(isolate_data, by = "id") %>%
gather(key, value, qnr_genes) %>%
group_by(key, value, species) %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = if_else(value == 1, "Present", "Absent")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(
Total = Present + Absent,
Percent = round(Present / Total * 100, 1)
) %>%
ggplot(aes(key, Percent, fill = species)) +
geom_col(color = "black",
position = position_dodge(0.9)) +
scale_fill_manual(values = palette) +
labs(y = "Percent (%) of isolates",
fill = NULL) +
theme_classic() +
theme(
axis.text = element_text(size = 12),
axis.title.y = element_text(size = 14),
legend.text = element_text(size = 12),
axis.title.x = element_blank(),
legend.justification = c(0, 1),
legend.position = c(0.82, 0.97)
)total_df <- acquired_report %>%
left_join(mut_report, by = "id") %>%
left_join(isolate_data[c("id","species")], by = "id") %>%
select(-c(nt_change, id)) %>%
mutate_at(vars(-species),
funs(as.numeric)) %>%
mutate(qnr = ifelse(rowSums(.[,2:8]) > 0, 1, 0)) %>%
{. ->> total_resistance_report} %>%
split(., f = .$species) %>%
lapply(., function(x) select(x, -species))
corr_matrices <- lapply(total_df, function(x) create_corr_matrix(x)) %>%
bind_rows(.id = "species") %>%
mutate(gene1 = gsub("(.*?)_.+", "\\1", var1),
gene2 = gsub("(.*?)_.+", "\\1", var2),
var1 = gsub(".+_(.*?)", "\\1", var1),
var2 = gsub(".+_(.*?)", "\\1", var2),
type1 = case_when(gene1 == "gyrA" ~ 1,
gene1 == "gyrB" ~ 2,
gene1 == "parC" ~ 3,
gene1 == "parE" ~ 4,
gene1 == "marR" ~ 5,
gene1 == "marA" ~ 6,
gene1 == "rpoB" ~ 7,
gene1 == "soxS" ~ 8,
gene1 == "robA" ~ 9,
gene1 == "gadX" ~ 10,
gene1 == "qnrA1" ~ 11,
gene1 == "qnrB19" ~ 12,
gene1 == "qnrB6" ~ 13,
gene1 == "qnrB60" ~ 14,
gene1 == "qnrS1" ~ 15,
gene1 == "qnrS2" ~ 16,
gene1 == "qnrS4" ~ 17,
gene1 == "qnr" ~ 18,
gene1 == "qepA4" ~ 19),
type2 = case_when(gene2 == "gyrA" ~ 1,
gene2 == "gyrB" ~ 2,
gene2 == "parC" ~ 3,
gene2 == "parE" ~ 4,
gene2 == "marR" ~ 5,
gene2 == "marA" ~ 6,
gene2 == "rpoB" ~ 7,
gene2 == "soxS" ~ 8,
gene2 == "robA" ~ 9,
gene2 == "gadX" ~ 10,
gene2 == "qnrA1" ~ 11,
gene2 == "qnrB19" ~ 12,
gene2 == "qnrB6" ~ 13,
gene2 == "qnrB60" ~ 14,
gene2 == "qnrS1" ~ 15,
gene2 == "qnrS2" ~ 16,
gene2 == "qnrS4" ~ 17,
gene2 == "qnr" ~ 18,
gene2 == "qepA4" ~ 19))
cols <- c("#e7f0fa","#c9e2f6","#95cbee","#0099dc","#4ab04a",
"#ffd73e","#eec73a","#e29421","#f05336","#ce472e")
ggplot(corr_matrices, aes(reorder(var1, type1), reorder(var2, type2), fill = value))+
geom_tile(color = "white")+
scale_fill_gradient(low = "black",
high = "white",
na.value = "grey95")+
labs(fill = NULL)+
theme_classic()+
theme(axis.text = element_text(size = 8),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4),
axis.title = element_blank())+
coord_fixed()+
facet_wrap(~species)total_report <- mut_report %>%
left_join(acquired_report, by = "id") %>%
dplyr::select(-nt_change) %>%
mutate_at(vars(-id),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,13:19]) > 0, 1, 0)) %>%
select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
gather(gene, value, -id) %>%
mutate(present = ifelse(value == 1, gene, NA),
ref = 1:n()) %>%
spread(gene, value) %>%
group_by(id) %>%
summarise_all(funs(func_paste)) %>%
select(-ref) %>%
left_join(mic_data[c("id","CIP")], by = "id") %>%
group_by(present,CIP) %>%
dplyr::count() %>%
ungroup() %>%
rowwise() %>%
mutate(present = ifelse(present == "", "None", present),
total = 280,
percent = round(n/total * 100, 1),
lwr = round(get_binCI(n, total)[1], 1),
upr = round(get_binCI(n, total)[2], 1)) %>%
ungroup() %>%
mutate(entries = sapply(strsplit(.$present, ","),
FUN = function(x) {length(x)}))
genes <- c(names(mut_report), names(acquired_report), "qnr")
genes <- genes[!genes %in% c("id","nt_change","qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")]
n_plot <- ggplot(total_report, aes(reorder(present, entries), percent, fill = factor(CIP)))+
geom_col(color = "black")+
labs(y = "Percent (%) isolates")+
scale_fill_viridis(discrete = TRUE)+
guides(fill=guide_legend(ncol = 10))+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 8),
axis.title.x = element_blank(),
legend.text = element_text(size = 8),
legend.title = element_blank(),
legend.position = "top",
legend.spacing.x = unit(0.2, "cm"),
legend.justification = "center")
dot_report <- mut_report %>%
left_join(acquired_report, by = "id") %>%
select(-nt_change) %>%
mutate_at(vars(-id),
funs(as.numeric(.))) %>%
mutate(qnr = ifelse(rowSums(.[,13:19]) > 0, 1, 0)) %>%
select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
gather(gene, value, -id) %>%
mutate(present = ifelse(value == 1, gene, NA),
ref = 1:n()) %>%
spread(gene, value) %>%
group_by(id) %>%
summarise_all(funs(func_paste)) %>%
left_join(mic_data[c("id","CIP")], by = "id") %>%
select(-c(ref, id)) %>%
ungroup() %>%
mutate_at(vars(-present, CIP), .funs = as.numeric) %>%
mutate(none = ifelse(rowSums(.[,genes]) == 0, 1, 0),
present = ifelse(present == "", "None", present)) %>%
gather(gene, value, -c(present,CIP)) %>%
mutate(gene = ifelse(value == 0 & gene != "None", NA, gene),
value = ifelse(gene == "None" & value == 0, NA, value)) %>%
na.omit() %>%
mutate(entries = sapply(strsplit(.$present, ","),
FUN = function(x) {length(x)}))
dot_plot <- ggplot(dot_report, aes(reorder(present, entries), gene, fill = gene))+
geom_point(size = 2)+
scale_y_discrete(limits = c("gyrA","parC","parE","gadX","marA","marR","rpoB","soxS","qepA4","qnr"))+
guides(fill = FALSE)+
theme(axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
axis.title = element_blank(),
axis.ticks.x = element_blank())
plot_grid(n_plot, dot_plot, nrow = 2, align = "v", rel_heights = c(3.5/5, 1.5/5))mic_data %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
select(species,contains("_R")) %>%
gather(compound, value, -species) %>%
mutate(compound = sub("_R", "", compound)) %>%
group_by_all() %>%
dplyr::count() %>%
ungroup() %>%
mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
spread(value, n, fill = 0) %>%
rowwise() %>%
mutate(Total = Present + Absent,
Percent = round((Present/Total)*100, 1),
lwr = round(get_binCI(Present, Total)[1], 1),
upr = round(get_binCI(Present, Total)[2], 1),
species = factor(species, levels = c("Broiler","Pig","Wild bird","Red fox"))) %>%
ggplot(aes(reorder(compound, -Percent), Percent, fill = species))+
geom_col(color = "black")+
geom_errorbar(aes(ymin = lwr, ymax = upr),
width = 0.6)+
guides(fill = FALSE)+
scale_fill_manual(values = palette)+
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+
facet_wrap(~species)mic_data[,c("id","CIP")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
group_by(CIP,species) %>%
dplyr::count() %>%
ggplot(aes(factor(CIP), n, fill = species))+
geom_col(color = "black", position = position_fill())+
labs(y = "Percent isolates",
x = "CIP MIC")+
scale_fill_manual(values = palette)+
scale_y_continuous(labels = percent_format())+
theme(legend.title = element_blank())mic_data[,c("id","NAL")] %>%
left_join(isolate_data[,c("id","species")], by = "id") %>%
group_by(NAL,species) %>%
dplyr::count() %>%
ggplot(aes(factor(NAL), n, fill = species))+
geom_col(color = "black", position = position_fill())+
labs(y = "Percent isolates",
x = "NAL MIC")+
scale_fill_manual(values = palette)+
scale_y_continuous(labels = percent_format())+
theme(legend.title = element_blank())The red outliers have been excluded from the cgMLST analysis due to too many missing alleles
cgMLSTdata <- read.table("../data/cgMLST.txt",
sep = "\t",
colClasses = "factor",
header = T)
cgMLSTdata[cgMLSTdata == "NaN"] <- NA
cgMLSTdata2 <- cgMLSTdata %>%
mutate(n_NA = apply(., 1, function(x) sum(is.na(x))),
group = 1)
ggplot(cgMLSTdata2, aes(factor(group), n_NA)) +
geom_boxplot(outlier.size = 2) +
geom_point(data = cgMLSTdata2[cgMLSTdata2$n_NA > 500,], aes(factor(group), n_NA),
pch = 21,
size = 2,
fill = "red") +
labs(x = NULL,
y = "Number of NA's per isolate") +
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())palette3 <- c("1-A" = "#fc8d59",
"1-I" = "#80b1d3",
"0" = "grey95")
qr_genes <- mut_report[c("id","gyrA","parC","parE","rpoB","marA","marR","gadX","soxS")] %>%
mutate_at(vars(-id),
funs(ifelse(. == "1", "1-I", "0")))
ac_genes <- acquired_report %>%
mutate_at(vars(-id),
funs(as.numeric)) %>%
mutate(qnr = ifelse(rowSums(.[,3:9]) != 0 , 1, 0)) %>%
select(id, qepA4, qnr) %>%
mutate_at(vars(-id),
funs(ifelse(. == "1", "1-A", "0")))
total_data <- qr_genes %>%
left_join(ac_genes, by = "id")
total_data_id <- total_data %>%
column_to_rownames("id")
metadata_total <- mlst_data[,c("id","ST")] %>%
left_join(isolate_data[,c("id", "species")], by = "id")
cgMLSTdata_complete <- cgMLSTdata2 %>%
filter(n_NA < 500) %>%
select(-n_NA) %>%
column_to_rownames("Genome")
total_tree <- as.phylo(hclust(daisy(cgMLSTdata_complete, metric = "gower"), method = "average"))
total_tree_annotated <- ggtree(total_tree,
layout = "circular",
color = "#808080",
size = 0.5) %<+% metadata_total +
geom_tippoint(aes(color = species),
size = 3) +
geom_tiplab2(aes(label = ST),
offset = 0.01,
size = 4) +
scale_color_manual(values = palette)
total_tree_opened <- rotate_tree(open_tree(total_tree_annotated, 8), 93)
gheatmap(total_tree_opened,
total_data_id,
offset = 0.04,
width = 0.3,
colnames_offset_y = 2.5,
colnames_position = "top",
font.size = 5) +
scale_fill_manual(values = palette3)roary_tree <- read.tree("../data/Roary/accessory_binary_genes.fa.newick")
tree_refs <- roary_tree$tip.label
id_data2 <- id_data %>%
mutate(test = ref %in% tree_refs) %>%
filter(test == TRUE)
id_df <- left_join(data.frame(ref = tree_refs), id_data2, by = "ref") %>%
mutate(id = ifelse(is.na(id) == TRUE, ref, id))
roary_tree$tip.label <- id_df$id
total_data2 <- total_data %>%
filter(id %in% id_df$id) %>%
column_to_rownames("id")
metadata_total2 <- metadata_total %>%
filter(id %in% id_df$id)
roary_tree_colored <- ggtree(roary_tree,
layout = "circular",
color = "#808080",
size = 1) %<+% metadata_total2 +
geom_tiplab2(aes(label = ST, color = species),
offset = 0.01,
size = 6,
align = TRUE) +
scale_color_manual(values = palette)
total_tree_opened2 <- rotate_tree(open_tree(roary_tree_colored, 8), 93)
gheatmap(total_tree_opened2,
total_data2,
offset = 0.065,
width = 0.3,
colnames_offset_y = 1.9,
colnames_position = "top",
font.size = 5) +
scale_fill_manual(values = palette3)palette3 <- c("1-A" = "#fc8d59",
"1-I" = "#80b1d3",
"0" = "grey95")
qr_genes <- mut_report[c("id","gyrA","parC","parE","rpoB","marA","marR","gadX","soxS")] %>%
mutate_at(vars(-id),
funs(ifelse(. == "1", "1-I", "0")))
ac_genes <- acquired_report %>%
mutate_at(vars(-id),
funs(as.numeric)) %>%
mutate(qnr = ifelse(rowSums(.[,3:9]) != 0 , 1, 0)) %>%
select(id, qepA4, qnr) %>%
mutate_at(vars(-id),
funs(ifelse(. == "1", "1-A", "0")))
cgMLSTdata <- read.table("../data/cgMLST.txt",
sep = "\t",
colClasses = "factor",
header = T)
cgMLSTdata[cgMLSTdata == "NaN"] <- NA
cgMLSTdata2 <- cgMLSTdata %>%
mutate(n_NA = apply(., 1, function(x) sum(is.na(x)))) %>%
filter(n_NA < 500) %>%
select(-n_NA) %>%
column_to_rownames("Genome")
broiler_iso <- isolate_data[c("id", "species")] %>%
filter(species == "Broiler") %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S"))
pig_iso <- isolate_data[c("id", "species")] %>%
filter(species == "Pig") %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S"))
fox_iso <- isolate_data[c("id", "species")] %>%
filter(species == "Red fox") %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S"))
bird_iso <- isolate_data[c("id", "species")] %>%
filter(species == "Wild bird") %>%
filter(id %not_in% c("2016-17-565-1-S",
"2016-02-428-2-S",
"2016-02-486-2-S",
"2016-02-732-2-S",
"2014-01-1741-1-S"))
rawdata_broiler <- cgMLSTdata2 %>%
rownames_to_column("id") %>%
filter(id %in% broiler_iso$id) %>%
dplyr::rename("Genome" = `id`) %>%
select(Genome, everything()) %>%
left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
{. ->> data_broiler } %>%
select(-ST)
rawdata_pig <- cgMLSTdata2 %>%
rownames_to_column("id") %>%
filter(id %in% pig_iso$id) %>%
dplyr::rename("Genome" = `id`) %>%
select(Genome, everything()) %>%
left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
{. ->> data_pig } %>%
select(-ST)
rawdata_fox <- cgMLSTdata2 %>%
rownames_to_column("id") %>%
filter(id %in% fox_iso$id) %>%
dplyr::rename("Genome" = `id`) %>%
select(Genome, everything()) %>%
left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
{. ->> data_fox } %>%
select(-ST)
rawdata_bird <- cgMLSTdata2 %>%
rownames_to_column("id") %>%
filter(id %in% bird_iso$id) %>%
dplyr::rename("Genome" = `id`) %>%
select(Genome, everything()) %>%
left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
{. ->> data_bird } %>%
select(-ST)
write.table(rawdata_broiler, "../data/cgMLST_broiler.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_pig, "../data/cgMLST_pig.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_fox, "../data/cgMLST_fox.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_bird, "../data/cgMLST_bird.txt", sep = "\t", row.names = FALSE)
cgMLSTdata_broiler <- read.table("../data/cgMLST_broiler.txt",
sep = "\t",
row.names=1,
colClasses = "factor",
header = T)
cgMLSTdata_broiler[cgMLSTdata_broiler == "NaN"] <- NA
cgMLST_tree_broiler <- as.phylo(hclust(daisy(cgMLSTdata_broiler,
metric="gower"), method = "average"))
cgMLSTdata_pig <- read.table("../data/cgMLST_pig.txt",
sep = "\t",
row.names=1,
colClasses = "factor",
header = T)
cgMLSTdata_pig[cgMLSTdata_pig == "NaN"] <- NA
cgMLST_tree_pig <- as.phylo(hclust(daisy(cgMLSTdata_pig,
metric="gower"), method = "average"))
cgMLSTdata_fox <- read.table("../data/cgMLST_fox.txt",
sep = "\t",
row.names=1,
colClasses = "factor",
header = T)
cgMLSTdata_fox[cgMLSTdata_fox == "NaN"] <- NA
cgMLST_tree_fox <- as.phylo(hclust(daisy(cgMLSTdata_fox,
metric="gower"), method = "average"))
cgMLSTdata_bird <- read.table("../data/cgMLST_bird.txt",
sep = "\t",
row.names=1,
colClasses = "factor",
header = T)
cgMLSTdata_bird[cgMLSTdata_bird == "NaN"] <- NA
cgMLST_tree_bird <- as.phylo(hclust(daisy(cgMLSTdata_bird,
metric="gower"), method = "average"))
broiler_tree <- ggtree(cgMLST_tree_broiler,
layout = "circular",
color = "#808080",
size = 1) %<+% data_broiler +
geom_tippoint(size = 4,
color = "#4575b4")+
geom_tiplab2(aes(label = ST),
offset = 0.01,
size = 8)+
ggtitle("Broiler")+
theme(plot.title = element_text(size = 40))
pig_tree <- ggtree(cgMLST_tree_pig,
layout = "circular",
color = "#808080",
size = 1) %<+% data_pig +
geom_tippoint(size = 4,
color = "#74add1")+
geom_tiplab2(aes(label = ST),
offset = 0.01,
size = 8)+
ggtitle("Pig")+
theme(plot.title = element_text(size = 40))
fox_tree <- ggtree(cgMLST_tree_fox,
layout = "circular",
color = "#808080",
size = 1) %<+% data_fox +
geom_tippoint(size = 4,
color = "#f46d43")+
geom_tiplab2(aes(label = ST),
offset = 0.01,
size = 8)+
ggtitle("Red fox")+
theme(plot.title = element_text(size = 40))
bird_tree <- ggtree(cgMLST_tree_bird,
layout = "circular",
color = "#808080",
size = 1) %<+% data_bird +
geom_tippoint(size = 4,
color = "#fdae61")+
geom_tiplab2(aes(label = ST),
offset = 0.01,
size = 8)+
ggtitle("Wild bird")+
theme(plot.title = element_text(size = 40))
total_data <- qr_genes %>%
left_join(ac_genes, by = "id")
broiler_data <- total_data %>%
filter(id %in% broiler_iso$id) %>%
column_to_rownames("id")
pig_data <- total_data %>%
filter(id %in% pig_iso$id) %>%
column_to_rownames("id")
fox_data <- total_data %>%
filter(id %in% fox_iso$id) %>%
column_to_rownames("id")
bird_data <- total_data %>%
filter(id %in% bird_iso$id) %>%
column_to_rownames("id")
fan_tree_broiler <- open_tree(broiler_tree, 10)
rotated_fan_tree_broiler <- rotate_tree(fan_tree_broiler, 93)
broiler_annotated_tree <- gheatmap(rotated_fan_tree_broiler,
broiler_data,
offset = 0.06,
width = 0.5,
colnames_offset_y = 0.7,
colnames_position = "top",
font.size = 8,
color = "grey90") +
scale_fill_manual(values = palette3) +
guides(fill = FALSE)
fan_tree_pig <- open_tree(pig_tree, 10)
rotated_fan_tree_pig <- rotate_tree(fan_tree_pig, 92)
pig_annotated_tree <- gheatmap(rotated_fan_tree_pig,
pig_data,
offset = 0.06,
width = 0.5,
colnames_offset_y = 0.55,
colnames_position = "top",
font.size = 8,
color = "grey90") +
scale_fill_manual(values = palette3) +
guides(fill = FALSE)
fan_tree_fox <- open_tree(fox_tree, 10)
rotated_fan_tree_fox <- rotate_tree(fan_tree_fox, 92)
fox_annotated_tree <- gheatmap(rotated_fan_tree_fox,
fox_data,
offset = 0.06,
width = 0.5,
colnames_offset_y = 0.2,
colnames_position = "top",
font.size = 8,
color = "grey90") +
scale_fill_manual(values = palette3) +
guides(fill = FALSE)
fan_tree_bird <- open_tree(bird_tree, 10)
rotated_fan_tree_bird <- rotate_tree(fan_tree_bird, 93)
bird_annotated_tree <- gheatmap(rotated_fan_tree_bird,
bird_data,
offset = 0.06,
width = 0.5,
colnames_offset_y = 0.4,
colnames_position = "top",
font.size = 8,
color = "grey90") +
scale_fill_manual(values = palette3) +
guides(fill = FALSE)
broiler_annotated_treepig_annotated_treefox_annotated_treebird_annotated_treedist_broiler <- cophenetic.phylo(cgMLST_tree_broiler)
dist_broiler[dist_broiler > 0.95] <- NA
dist_pig <- cophenetic.phylo(cgMLST_tree_pig)
dist_pig[dist_pig > 0.95] <- NA
dist_fox <- cophenetic.phylo(cgMLST_tree_fox)
dist_fox[dist_fox > 0.95] <- NA
dist_bird <- cophenetic.phylo(cgMLST_tree_bird)
dist_bird[dist_bird > 0.95] <- NA
print(paste0("Broiler median distance: ",
round(median(dist_broiler, na.rm = TRUE), 2)))## [1] "Broiler median distance: 0.88"
print(paste0("Pig median distance: ",
round(median(dist_pig, na.rm = TRUE), 2)))## [1] "Pig median distance: 0.87"
print(paste0("Red fox median distance: ",
round(median(dist_fox, na.rm = TRUE), 2)))## [1] "Red fox median distance: 0.88"
print(paste0("Wild bird median distance: ",
round(median(dist_bird, na.rm = TRUE), 2)))## [1] "Wild bird median distance: 0.86"
par(mfrow = c(2,2))
x1 <- hist(dist_broiler,
breaks = 20,
plot = FALSE)
x1$density = x1$counts/sum(x1$counts)*100
plot(x1,
freq = FALSE,
main = "Broiler distance frequency",
xlab = "Distance",
ylim = c(0, 50))
x2 <- hist(dist_pig,
breaks = 20,
plot = FALSE)
x2$density = x2$counts/sum(x2$counts)*100
plot(x2,
freq = FALSE,
main = "Pig distance frequency",
xlab = "Distance",
ylim = c(0, 50))
x3 <- hist(dist_fox,
breaks = 20,
plot = FALSE)
x3$density = x3$counts/sum(x3$counts)*100
plot(x3,
freq = FALSE,
main = "Red fox distance frequency",
xlab = "Distance",
ylim = c(0, 50))
x4 <- hist(dist_bird,
breaks = 20,
plot = FALSE)
x4$density = x4$counts/sum(x4$counts)*100
plot(x4,
freq = FALSE,
main = "Wild bird distance frequency",
xlab = "Distance",
ylim = c(0, 50))